## Legend for variables in CSR dataset used in analysis below:
# MS = member state
# YEAR = year
# COMNR = number of CSRs by coountry-year (Commission)
# COMWRD = number of words used to formulate CSRs by country-year (Commission)
# PARELECT = number of months until next election in MS parliament
# VOTE = Voting power
# EMU = Dummy for EMU membership
# CURACC3Y = Current account indicator
# UNEMP3Y = Unemployment indicator
# NIIP = Net international investment position indicator
# EDPCOR = Dummy for corrective phase of EDP
# WVAR = weighted variance of EU opinions in Eurobarometer by country-year (polarisation)
# WKUR = weighted kurtosis of EU opinions in Eurobarometer by country-year (polarisation)
# WPOSLR = weighted MS cabinet (government) position on economic left-right dimension
# WPOSEU = weighted MS cabinet (government) position on pro-anti EU dimension
# REER3Y = Real effective exchange rate indicator
# FSL = Financial sector liabilities indicator
# GGD = General gross government debt indicator
# HPI = House price index indicator
# ULC3Y = Nominal unit labour cost indicator
# AR3Y = Activity rate indicator
# LTUR3Y = Long-term unemployment rate indicator
# YUR3Y = Youth unemployment rate indicator
# PSCF = Private sector credit flow indicator
# PSD = Private sector debt indicator
# EMS5Y = Export market share indicator

setwd("[insert your working directory here]")
getwd()

## update.packages()
## Loading packages:
## install.packages("foreign")
## install.packages("nlme")
## install.packages("lme4")
## install.packages("rio")
## install.packages("plyr")
## install.packages("dplyr")
## install.packages("ggplot2")
## install.packages("arm")
## install.packages("lmtest")
## install.packages("colorspace")

library("foreign")
library("nlme")
library("lme4")
library("rio")
library("plyr")
library("dplyr")
library("ggplot2")
library("arm")
library("lmtest")
library("lmerTest")

## Importing Data:
CSR <- read.csv("CSR Dataset CSV.csv", header = TRUE, sep = ";", dec = ",")
names(CSR)
dim(CSR)

## No. of groups and no. of observations per group:
table(CSR$MS)
length(unique(CSR$MS))
table(CSR$YEAR)
length(unique(CSR$YEAR))

## Removing NA's on outcome variable
CSR <- CSR[!(is.na(CSR$COMWRD)),]

## Reducing positive skew in COMWRD through log transformation:
CSR$logCOMWRD <- log(CSR$COMWRD)
qqnorm(CSR$COMWRD)
qqline(CSR$COMWRD)
hist(CSR$COMWRD)
qqnorm(CSR$logCOMWRD)
qqline(CSR$logCOMWRD)
hist(CSR$logCOMWRD)

## Recoding predictor variables where necessary:
CSR$VOTEp <- CSR$VOTE*100                 ## Recoding voting power to indicate percentages

CSR$mobil <- ifelse(CSR$PPAR...4 > 0, 1, 0) ## Creating dummy variable for presence of Eurosceptic challenger party (mobilisation)

## Merging topic model data with CSR dataset
topicdata <- read.csv2("postTopics1.csv", header = T) ## This dataset includes outcomes of qualitatively validated topic model at the level of the individual recommendation (N = 824)
names(topicdata) <- c("text", "No.", "ID", "X1", "X2","X3","X4","X5","X6","X7","X8","X9","X10",
                      "X11", "X12","X13","X14","X15","X16","X17","X18","X19","X20",
                      "X21", "X22","X23","X24","X25","X26","X27","X28","X29","X30", "ms", "year", "score", "max")

topicdata$No. <- as.numeric(gsub("\\w*\\d{4}_", "", topicdata$ID))
topicdata$ID <- gsub("_\\d", "", topicdata$ID)

## Creating scores for each social topic first
topicdata$soc3 <- as.numeric(topicdata$X3) 
topicdata$soc5 <- as.numeric(topicdata$X5)
topicdata$soc14 <- as.numeric(topicdata$X14)
topicdata$soc22 <- as.numeric(topicdata$X22)
topicdata$soc23 <- as.numeric(topicdata$X23)
topicdata$soc28 <- as.numeric(topicdata$X28)

## Creating single mean measure of social topics in CSRs by year to be used as outcome variable (see appendix C for details)
topicdata$soc <- 1:nrow(topicdata)

recmeans <- topicdata %>%
  dplyr::group_by(ID) %>%
  dplyr::summarise(soc            = sum(soc3, soc5, soc14, soc22, soc23, soc28),
                   No.            = max(No.))
recmeans$socrel <- recmeans$soc/recmeans$No.
summary(recmeans$socrel)
cor(recmeans$soc, recmeans$socrel)

CSR <- merge(CSR, recmeans, by = "ID")

## Creating bins to create MIP breach variables, see http://ec.europa.eu/eurostat/web/macroeconomic-imbalances-procedure/indicators
CSR$CURACCcut <- cut(CSR$CURACC3Y, breaks = c(-Inf, -4, 5.9999999, Inf), labels = c("below", "within", "above"))
CSR$CURACCN <- as.numeric(CSR$CURACCcut == "below")
CSR$CURACCZ <- as.numeric(CSR$CURACCcut == "within")
CSR$CURACCP <- as.numeric(CSR$CURACCcut == "above")
CSR$CURACCB <- as.numeric(CSR$CURACCcut == "above") + as.numeric(CSR$CURACCcut == "below")
table(CSR$CURACCcut)
table(CSR$CURACCB)

CSR$NIIPcut <- cut(CSR$NIIP, breaks = c(-Inf, -35, Inf), labels = c("below", "within"))
CSR$NIIPN <- as.numeric(CSR$NIIPcut == "below")
CSR$NIIPZ <- as.numeric(CSR$NIIPcut == "within")
CSR$NIIPB <- as.numeric(CSR$NIIPcut == "below")
table(CSR$NIIPcut)
table(CSR$NIIPB)

CSR$EMScut <- cut(CSR$EMS5Y, breaks = c(-Inf, -6, Inf), labels = c("below", "within"))
CSR$EMSN <- as.numeric(CSR$EMScut == "below")
CSR$EMSZ <- as.numeric(CSR$EMScut == "within")
CSR$EMSB <- as.numeric(CSR$EMScut == "below")
table(CSR$EMScut)
table(CSR$EMSB)

CSR$PSDcut <- cut(CSR$PSD, breaks = c(-Inf, 132.999999999, Inf), labels = c("within", "above"))
CSR$PSDZ <- as.numeric(CSR$PSDcut == "within")
CSR$PSDP <- as.numeric(CSR$PSDcut == "above")
CSR$PSDB <- as.numeric(CSR$PSDcut == "above")
table(CSR$PSDcut)
table(CSR$PSDB)

CSR$PSCFcut <- cut(CSR$PSCF, breaks = c(-Inf, 13.99999999, Inf), labels = c("within", "above"))
CSR$PSCFZ <- as.numeric(CSR$PSCFcut == "within")
CSR$PSCFP <- as.numeric(CSR$PSCFcut == "above")
CSR$PSCFB <- as.numeric(CSR$PSCFcut == "above")
table(CSR$PSCFcut)
table(CSR$PSCFB)

CSR$HPIcut <- cut(CSR$HPI, breaks = c(-Inf, 5.99999999, Inf), labels = c("within", "above"))
CSR$HPIZ <- as.numeric(CSR$HPIcut == "within")
CSR$HPIP <- as.numeric(CSR$HPIcut == "above")
CSR$HPIB <- as.numeric(CSR$HPIcut == "above")
table(CSR$HPIcut)
table(CSR$HPIB)

CSR$GGDcut <- cut(CSR$GGD, breaks = c(-Inf, 59.99999999, Inf), labels = c("within", "above"))
CSR$GGDZ <- as.numeric(CSR$GGDcut == "within")
CSR$GGDP <- as.numeric(CSR$GGDcut == "above")
CSR$GGDB <- as.numeric(CSR$GGDcut == "above")
table(CSR$GGDcut)
table(CSR$GGDB)

CSR$UNEMPcut <- cut(CSR$UNEMP3Y, breaks = c(-Inf, 9.99999999, Inf), labels = c("within", "above"))
CSR$UNEMPZ <- as.numeric(CSR$UNEMPcut == "within")
CSR$UNEMPP <- as.numeric(CSR$UNEMPcut == "above")
CSR$UNEMPB <- as.numeric(CSR$UNEMPcut == "above")
table(CSR$UNEMPcut)
table(CSR$UNEMPB)

CSR$FSLcut <- cut(CSR$FSL, breaks = c(-Inf, 16.4999999, Inf), labels = c("within", "above"))
CSR$FSLZ <- as.numeric(CSR$FSLcut == "within")
CSR$FSLP <- as.numeric(CSR$FSLcut == "above")
CSR$FSLB <- as.numeric(CSR$FSLcut == "above")
table(CSR$FSLcut)
table(CSR$FSLB)

CSR$YURcut <- cut(CSR$YUR3Y, breaks = c(-Inf, 0.19999999, Inf), labels = c("within", "above"))
CSR$YURZ <- as.numeric(CSR$YURcut == "within")
CSR$YURP <- as.numeric(CSR$YURcut == "above")
CSR$YURB <- as.numeric(CSR$YURcut == "above")
table(CSR$YURcut)
table(CSR$YURB)

CSR$LTURcut <- cut(CSR$LTUR3Y, breaks = c(-Inf, 0.499999999, Inf), labels = c("within", "above"))
CSR$LTURZ <- as.numeric(CSR$LTURcut == "within")
CSR$LTURP <- as.numeric(CSR$LTURcut == "above")
CSR$LTURB <- as.numeric(CSR$LTURcut == "above")
table(CSR$LTURcut)
table(CSR$LTURB)

CSR$ARcut <- cut(CSR$AR3Y, breaks = c(-Inf, -0.2, Inf), labels = c("below", "within"))
CSR$ARN <- as.numeric(CSR$ARcut == "below")
CSR$ARZ <- as.numeric(CSR$ARcut == "within")
CSR$ARB <- as.numeric(CSR$ARcut == "below")
table(CSR$ARcut)
table(CSR$ARB)

# Create DFs for Euro area and non-Euro area, to apply correct MIP thresholds in each group
CSRNEA <- CSR[CSR$EMU == 0, ]
CSREA <- CSR[CSR$EMU == 1, ]

## EURO AREA BINS
CSREA$ULCcut <- cut(CSREA$ULC3Y, breaks = c(-Inf, 8.99999999, Inf), labels = c("within", "above"))
CSREA$ULCZ <- as.numeric(CSREA$ULCcut == "within")
CSREA$ULCP <- as.numeric(CSREA$ULCcut == "above")
CSREA$ULCB <- as.numeric(CSREA$ULCcut == "above")
table(CSREA$ULCcut)
table(CSREA$ULCB)

CSREA$REERcut <- cut(CSREA$REER3Y, breaks = c(-Inf, -5, 4.9999999, Inf), labels = c("below", "within", "above"))
CSREA$REERN <- as.numeric(CSREA$REERcut == "below")
CSREA$REERZ <- as.numeric(CSREA$REERcut == "within")
CSREA$REERP <- as.numeric(CSREA$REERcut == "above")
CSREA$REERB <- as.numeric(CSREA$REERcut == "above") + as.numeric(CSREA$REERcut == "below")
table(CSREA$REERcut)
table(CSREA$REERB)

## NON-EURO AREA BINS
CSRNEA$ULCcut <- cut(CSRNEA$ULC3Y, breaks = c(-Inf, 11.99999999, Inf), labels = c("within", "above"))
CSRNEA$ULCZ <- as.numeric(CSRNEA$ULCcut == "within")
CSRNEA$ULCP <- as.numeric(CSRNEA$ULCcut == "above")
CSRNEA$ULCB <- as.numeric(CSRNEA$ULCcut == "above")
table(CSRNEA$ULCcut)
table(CSRNEA$ULCB)

CSRNEA$REERcut <- cut(CSRNEA$REER3Y, breaks = c(-Inf, -11, 10.9999999, Inf), labels = c("below", "within", "above"))
CSRNEA$REERN <- as.numeric(CSRNEA$REERcut == "below")
CSRNEA$REERZ <- as.numeric(CSRNEA$REERcut == "within")
CSRNEA$REERP <- as.numeric(CSRNEA$REERcut == "above")
CSRNEA$REERB <- as.numeric(CSRNEA$REERcut == "above") + as.numeric(CSRNEA$REERcut == "below")
table(CSRNEA$REERcut)
table(CSRNEA$REERB)

CSR <- rbind(CSREA, CSRNEA)

## Creating required functions:
twoSD <- function (x) {
  (x - mean(x, na.rm = TRUE))/(2 * sd(x, na.rm = TRUE))}   ## Standardizing by 2sd & mean centering

getSE <- function (x) {                                    ## shows SE
  sqrt(diag(vcov(x)))}

meanc <- function (x) {                                    ## Normal mean centering
  (x - mean(x, na.rm = TRUE))}

## In the following transformations, I standardize all non-binary variables by 2 SD.
## Grand mean centering all level 1 predictor variables:
grand_mean_variables <- CSR %>%
  group_by(MS) %>%
  summarise(VOTE_gcent     = mean(VOTEp, na.rm = TRUE),
            EMU_gcent      = mean(EMU, na.rm = TRUE),
            EDPCOR_gcent   = mean(EDPCOR, na.rm = TRUE),
            GDPEU_gcent    = mean(GDPEU, na.rm = TRUE),
            CRED_gcent     = mean(CRED, na.rm = TRUE),
            PARELECT_gcent = mean(PARELECT, na.rm = TRUE),
            WVAR_gcent     = mean(WVAR, na.rm = TRUE),
            WKUR_gcent     = mean(WKUR, na.rm = TRUE),
            WPOSLR_gcent   = mean(WPOSLR, na.rm = TRUE),
            WPOSEU_gcent   = mean(WPOSEU, na.rm = TRUE),
            mobil_gcent    = mean(mobil, na.rm = TRUE),
            NIIPB_gcent    = mean(NIIPB, na.rm = TRUE),
            CURACCB_gcent  = mean(CURACCB, na.rm = TRUE),
            EMSB_gcent     = mean(EMSB, na.rm = TRUE),
            PSDB_gcent     = mean(PSDB, na.rm = TRUE),
            PSCFB_gcent    = mean(PSCFB, na.rm = TRUE),
            HPIB_gcent     = mean(HPIB, na.rm = TRUE),
            GGDB_gcent     = mean(GGDB, na.rm = TRUE),
            UNEMPB_gcent   = mean(UNEMPB, na.rm = TRUE),
            FSLB_gcent     = mean(FSLB, na.rm = TRUE),
            YURB_gcent     = mean(YURB, na.rm = TRUE),
            LTURB_gcent    = mean(LTURB, na.rm = TRUE),
            ARB_gcent      = mean(ARB, na.rm = TRUE),
            ULCB_gcent     = mean(ULCB, na.rm = TRUE),
            REERB_gcent    = mean(REERB, na.rm = TRUE))

grand_means <- lapply(grand_mean_variables, function(x) mean(x, na.rm=TRUE))

## Means differ slightly because the grand mean is corrected for differences in observations per country
mean(CSR$WKUR)
mean(grand_mean_variables$WKUR_gcent)

CSR$VOTE_gcent     <- (CSR$VOTEp    - grand_means$VOTE_gcent) /(2 * sd(CSR$VOTEp, na.rm = TRUE))
CSR$EMU_gcent      <- CSR$EMU       - grand_means$EMU_gcent
CSR$EDPCOR_gcent   <- CSR$EDPCOR    - grand_means$EDPCOR_gcent
CSR$GDPEU_gcent    <- (CSR$GDPEU    - grand_means$GDPEU_gcent) /(2 * sd(CSR$GDPEU, na.rm = TRUE))
CSR$CRED_gcent     <- CSR$CRED      - grand_means$CRED_gcent
CSR$PARELECT_gcent <- (CSR$PARELECT - grand_means$PARELECT_gcent) /(2 * sd(CSR$PARELECT, na.rm = TRUE))
CSR$WVAR_gcent     <- (CSR$WVAR     - grand_means$WVAR_gcent) /(2 * sd(CSR$WVAR, na.rm = TRUE))
CSR$WKUR_gcent     <- (CSR$WKUR     - grand_means$WVAR_gcent) /(2 * sd(CSR$WKUR, na.rm = TRUE))
CSR$WPOSLR_gcent   <- (CSR$WPOSLR   - grand_means$WPOSLR_gcent) /(2 * sd(CSR$WPOSLR, na.rm = TRUE))
CSR$WPOSEU_gcent   <- (CSR$WPOSEU   - grand_means$WPOSEU_gcent) /(2 * sd(CSR$WPOSEU, na.rm = TRUE))
CSR$mobil_gcent    <- CSR$mobil     - grand_means$mobil_gcent
CSR$NIIPB_gcent    <- CSR$NIIPB     - grand_means$NIIPB_gcent
CSR$CURACCB_gcent  <- CSR$CURACCB   - grand_means$CURACCB_gcent
CSR$EMSB_gcent     <- CSR$EMSB      - grand_means$EMSB_gcent
CSR$PSDB_gcent     <- CSR$PSDB      - grand_means$PSDB_gcent
CSR$PSCFB_gcent    <- CSR$PSCFB     - grand_means$PSCFB_gcent
CSR$HPIB_gcent     <- CSR$HPIB      - grand_means$HPIB_gcent
CSR$GGDB_gcent     <- CSR$GGDB      - grand_means$GGDB_gcent 
CSR$UNEMPB_gcent   <- CSR$UNEMPB    - grand_means$UNEMPB_gcent
CSR$FSLB_gcent     <- CSR$FSLB      - grand_means$FSLB_gcent
CSR$YURB_gcent     <- CSR$YURB      - grand_means$YURB_gcent
CSR$LTURB_gcent    <- CSR$LTURB     - grand_means$LTURB_gcent
CSR$ARB_gcent      <- CSR$ARB       - grand_means$ARB_gcent
CSR$ULCB_gcent     <- CSR$ULCB      - grand_means$ULCB_gcent
CSR$REERB_gcent    <- CSR$REERB     - grand_means$REERB_gcent

## Descriptive statistics as dataframe
names(CSR)
CSRdesc <- CSR[ ,c("COMWRD", "socrel","VOTE","EMU",
                   "PARELECT", "WKUR", "mobil",
                   "WPOSLR", "WPOSEU", "CURACCB", "NIIPB", 
                   "EMSB", "ULCB", "REERB", "PSDB", "PSCFB", "HPIB", "GGDB", "UNEMPB", 
                   "FSLB", "ARB", "YURB", "LTURB")]

means <- sapply(CSRdesc, mean, na.rm = T)
sds <- sapply(CSRdesc, sd, na.rm = T)
mins <- sapply(CSRdesc, min, na.rm = T)
maxs <- sapply(CSRdesc, max, na.rm = T)
descs <- data.frame(cbind(names(CSRdesc),round(means, 3), round(sds, 3), round(mins, 3), round(maxs, 3)))
#write.csv2(descs, file = "descriptives.csv")

## Plots for log transformation of outcome variable (COMWRD)
max(CSR$COMWRD)
breaks1 <- seq(0,1200,by=25)
ggplot(CSR,aes(x=COMWRD))+
  geom_histogram(breaks=breaks1, aes(y=..density..), position = "identity",
                 alpha=0.3, fill='gray', colour='black') +
  stat_function(fun=dnorm, size = 1, args=list(mean=mean(CSR$COMWRD), sd=sd(CSR$COMWRD)))+
  ggtitle("Number of words proposed by Commission (EU-27): Raw measure") + xlab("Number of words (bins of 25)")

summary(CSR$logCOMWRD)
breaks2 <- seq(2,8,by=0.1)
ggplot(CSR,aes(x=logCOMWRD))+
  geom_histogram(breaks=breaks2, aes(y=..density..), position = "identity",
                 alpha=0.3, fill='gray', colour='black') +
  stat_function(fun=dnorm, size = 1, args=list(mean=mean(CSR$logCOMWRD), sd=sd(CSR$logCOMWRD)))+
  ggtitle("Number of words proposed by Commission (EU-27): Log transformation") + xlab("Log(number of words)")


##################################################################################################################
############################################## MULTILEVEL MODELING ###############################################
##################################################################################################################

## Dataset with all variables as included in article
CSRmv <- CSR[ ,c("MS","YEAR","logCOMWRD",  "socrel", "VOTE_gcent","EMU_gcent","EDPCOR_gcent","GDPEU_gcent",
                 "CRED_gcent","PARELECT_gcent", "WVAR_gcent", "WKUR_gcent", "WPOSLR_gcent", "WPOSEU_gcent", 
                 "NIIPB_gcent", "CURACCB_gcent", "EMSB_gcent", "HPIB_gcent", "GGDB_gcent", "UNEMPB_gcent", 
                   "YURB_gcent", "LTURB_gcent", "ARB_gcent", "ULCB_gcent", "REERB_gcent", "mobil_gcent")]

## Removing NA's through listwise deletion
summary(CSRmv)
CSRmv <- na.omit(CSRmv)

## Specifying empty model for no. words (A0)
model.word0 <- lmer(logCOMWRD ~ 1 +
                      (1 | MS) + (1 | YEAR),
                    REML = FALSE,
                    data = CSRmv)
summary(model.word0)

## Specifying empty model for models on social investment (B0)
model.soc0 <- lmer(socrel ~ 1 +
                     (1 | MS) + (1 | YEAR),
                   REML = FALSE,
                   data = CSRmv)
summary(model.soc0)

## Calculating ICC for A0
MSvar        <- 0.16734
YEARvar      <- 0.27852
level1var    <- 0.08688
level2var    <- MSvar + YEARvar
totalvar     <- MSvar + YEARvar + level1var
ICClvl2      <- level2var/totalvar
ICClvl2
ICCMStotal   <- MSvar/totalvar
ICCMStotal
ICCYEARtotal <- YEARvar/totalvar
ICCYEARtotal

## Calculating ICC for B0
MSvar        <- 0.002772
YEARvar      <- 0.00000
level1var    <- 0.008211
level2var    <- MSvar + YEARvar
totalvar     <- MSvar + YEARvar + level1var
ICClvl2      <- level2var/totalvar
ICClvl2
ICCMStotal   <- MSvar/totalvar
ICCMStotal
ICCYEARtotal <- YEARvar/totalvar
ICCYEARtotal

## Model A1
model.word1 <- lmer(logCOMWRD ~ 1 + VOTE_gcent + EMU_gcent + NIIPB_gcent + GGDB_gcent + UNEMPB_gcent + YURB_gcent +
                        EMSB_gcent + CURACCB_gcent + ARB_gcent + ULCB_gcent + HPIB_gcent + REERB_gcent +
                        (1 | MS) + (1 | YEAR),
                      REML = FALSE,
                      data = CSRmv)

## Model A2
model.word2 <- lmer(logCOMWRD ~ 1 + VOTE_gcent + EMU_gcent + NIIPB_gcent + GGDB_gcent + UNEMPB_gcent + YURB_gcent +
                         EMSB_gcent + CURACCB_gcent + ARB_gcent + ULCB_gcent + HPIB_gcent + REERB_gcent +
                         WKUR_gcent + mobil_gcent +
                         (1 | MS) + (1 | YEAR),
                       REML = FALSE,
                       data = CSRmv)

## Model A3
model.word3 <- lmer(logCOMWRD ~ 1 + VOTE_gcent + EMU_gcent + NIIPB_gcent + GGDB_gcent + UNEMPB_gcent + YURB_gcent +
                        EMSB_gcent + CURACCB_gcent + ARB_gcent + ULCB_gcent + HPIB_gcent + REERB_gcent +
                        WKUR_gcent + mobil_gcent + PARELECT_gcent + WPOSEU_gcent + WPOSLR_gcent +
                        (1 | MS) + (1 | YEAR),
                      REML = FALSE,
                      data = CSRmv)

anova(model.word0, model.word1, model.word2, model.word3)
summary(model.word3)
cov2cor(vcov(model.word3))
exp(fixef(model.word3))
#confint.merMod(model.word3)
#confint.merMod(model.word3, level = 0.99)
#confint.merMod(model.word3, level = 0.999)

## Adds interaction between kurtosis and voting power (not reported in article)
CSRmv$interact <- CSRmv$WKUR_gcent*CSRmv$VOTE_gcent
model.word4 <- lmer(logCOMWRD ~ 1 + VOTE_gcent + EMU_gcent + NIIPB_gcent + GGDB_gcent + UNEMPB_gcent + YURB_gcent +
                        EMSB_gcent + CURACCB_gcent + ARB_gcent + ULCB_gcent + HPIB_gcent + REERB_gcent +
                        WKUR_gcent + PARELECT_gcent + WPOSEU_gcent + WPOSLR_gcent + WKUR_gcent*VOTE_gcent +
                        (1 | MS) + (1 | YEAR),
                      REML = FALSE,
                      data = CSRmv)
anova(model.word0, model.word1, model.word2, model.word3, model.word4)
summary(model.word4)
cov2cor(vcov(model.word4))
exp(fixef(model.word4))
confint.merMod(model.word4)

library(interplot)
interplot(m = model.word4, var1 = "WKUR_gcent", var2 = "VOTE_gcent")

## Outputting to table
library(texreg)
a <- htmlreg(list(model.word0, model.word1, model.word2, model.word3),
             digits = 3,
             custom.coef.names = c("(Intercept)", "Political power", "EMU", "NIIP", "Gross govt. debt", "Unemployment rate", "Youth unemployment rate",
                                   "Export Market Shares", "Current Account", "Activity rate", "Unit Labour Cost",
                                   "House Price Index", "Real effective exchange rate", "Polarisation", "Mobilisation", "Election",
                                   "Govt. pos. anti-pro EU", "Govt. pos. left-right"),
             dcolumn = TRUE,
             booktabs = TRUE,
             use.packages = FALSE,
             caption = "Cross-classified multilevel models (outcome: logCOMWORD)",
             fontsize = "scriptsize",
             label = "tab:tab-01")
a

## Models B for share of social investment
## Model B1
model.soc1 <- lmer(socrel ~ 1 + VOTE_gcent + EMU_gcent + NIIPB_gcent + GGDB_gcent + UNEMPB_gcent + YURB_gcent +
                      EMSB_gcent + CURACCB_gcent + ARB_gcent + ULCB_gcent + HPIB_gcent + REERB_gcent +
                      (1 | MS) + (1 | YEAR),
                    REML = FALSE,
                    data = CSRmv)

## Model B2
model.soc2 <- lmer(socrel ~ 1 + VOTE_gcent + EMU_gcent + NIIPB_gcent + GGDB_gcent + UNEMPB_gcent + YURB_gcent +
                       EMSB_gcent + CURACCB_gcent + ARB_gcent + ULCB_gcent + HPIB_gcent + REERB_gcent +
                       WKUR_gcent + mobil_gcent + 
                       (1 | MS) + (1 | YEAR),
                     REML = FALSE,
                     data = CSRmv)

## Model B3
model.soc3 <- lmer(socrel ~ 1 + VOTE_gcent + EMU_gcent + NIIPB_gcent + GGDB_gcent + UNEMPB_gcent + YURB_gcent +
                       EMSB_gcent + CURACCB_gcent + ARB_gcent + ULCB_gcent + HPIB_gcent + REERB_gcent +
                       WKUR_gcent + mobil_gcent + PARELECT_gcent + WPOSEU_gcent + WPOSLR_gcent +
                       (1 | MS) + (1 | YEAR),
                     REML = FALSE,
                     data = CSRmv)
anova(model.soc0, model.soc1, model.soc2, model.soc3, digits = 3)
summary(model.soc3)
cov2cor(vcov(model.soc3))
exp(fixef(model.soc3))
#confint.merMod(model.soc3, level = 0.90)
#confint.merMod(model.soc3)
#confint.merMod(model.soc3, level = 0.99)
#confint.merMod(model.soc3, level = 0.999)

a <- htmlreg(list(model.soc0, model.soc1, model.soc2, model.soc3),
             digits = 3,
             custom.coef.names = c("(Intercept)", "Political power", "EMU", "NIIP", "Gross govt. debt", "Unemployment rate", "Youth unemployment rate",
                                   "Export Market Shares", "Current Account", "Activity rate", "Unit Labour Cost",
                                   "House Price Index", "Real effective exchange rate", "Polarisation", "Mobilisation", "Election",
                                   "Govt. pos. anti-pro EU", "Govt. pos. left-right"),
             dcolumn = TRUE,
             booktabs = TRUE,
             use.packages = FALSE,
             caption = "Cross-classified multilevel models (outcome: socrel)",
             fontsize = "scriptsize",
             label = "tab:tab-01")
a

a <- cbind(round(exp(fixef(model.word0)), 3),
           round(exp(fixef(model.word1)), 3),
           round(exp(fixef(model.word2)), 3),
           round(exp(fixef(model.word3)), 3))
a
#write.csv2(as.data.frame(a), file = "exps.csv")

## ASSESSING MULTICOLLINEARITY
vif.mer <- function (fit) {
  ## adapted from rms::vif
  
  v <- vcov(fit)
  nam <- names(fixef(fit))
  
  ## exclude intercepts
  ns <- sum(1 * (nam == "Intercept" | nam == "(Intercept)"))
  if (ns > 0) {
    v <- v[-(1:ns), -(1:ns), drop = FALSE]
    nam <- nam[-(1:ns)]
  }
  
  d <- diag(v)^0.5
  v <- diag(solve(v/(d %o% d)))
  names(v) <- nam
  v
}

kappa.mer <- function (fit,
                       scale = TRUE, center = FALSE,
                       add.intercept = TRUE,
                       exact = FALSE) {
  X <- fit@pp$X
  nam <- names(fixef(fit))
  
  ## exclude intercepts
  nrp <- sum(1 * (nam == "(Intercept)"))
  if (nrp > 0) {
    X <- X[, -(1:nrp), drop = FALSE]
    nam <- nam[-(1:nrp)]
  }
  
  if (add.intercept) {
    X <- cbind(rep(1), scale(X, scale = scale, center = center))
    kappa(X, exact = exact)
  } else {
    kappa(scale(X, scale = scale, center = scale), exact = exact)
  }
}

colldiag.mer <- function (fit,
                          scale = TRUE, center = FALSE,
                          add.intercept = TRUE) {
  ## adapted from perturb::colldiag, method in Belsley, Kuh, and
  ## Welsch (1980).  look for a high condition index (> 30) with
  ## more than one high variance propotion.  see ?colldiag for more
  ## tips.
  result <- NULL
  if (center) 
    add.intercept <- FALSE
  if (is.matrix(fit) || is.data.frame(fit)) {
    X <- as.matrix(fit)
    nms <- colnames(fit)
  }
  else if (class(fit) == "mer") {
    nms <- names(fixef(fit))
    X <- fit@X
    if (any(grepl("(Intercept)", nms))) {
      add.intercept <- FALSE
    }
  }
  X <- X[!is.na(apply(X, 1, all)), ]
  
  if (add.intercept) {
    X <- cbind(1, X)
    colnames(X)[1] <- "(Intercept)"
  }
  X <- scale(X, scale = scale, center = center)
  
  svdX <- svd(X)
  svdX$d
  condindx <- max(svdX$d)/svdX$d
  dim(condindx) <- c(length(condindx), 1)
  
  Phi = svdX$v %*% diag(1/svdX$d)
  Phi <- t(Phi^2)
  pi <- prop.table(Phi, 2)
  colnames(condindx) <- "cond.index"
  if (!is.null(nms)) {
    rownames(condindx) <- nms
    colnames(pi) <- nms
    rownames(pi) <- nms
  } else {
    rownames(condindx) <- 1:length(condindx)
    colnames(pi) <- 1:ncol(pi)
    rownames(pi) <- 1:nrow(pi)
  }         
  
  result <- data.frame(cbind(condindx, pi))
  zapsmall(result)
}

maxcorr.mer <- function (fit,
                         exclude.intercept = TRUE) {
  so <- summary(fit)
  corF <- so@vcov@factors$correlation
  nam <- names(fixef(fit))
  
  ## exclude intercepts
  ns <- sum(1 * (nam == "Intercept" | nam == "(Intercept)"))
  if (ns > 0 & exclude.intercept) {
    corF <- corF[-(1:ns), -(1:ns), drop = FALSE]
    nam <- nam[-(1:ns)]
  }
  corF[!lower.tri(corF)] <- 0
  maxCor <- max(corF)
  minCor <- min(corF)
  if (abs(maxCor) > abs(minCor)) {
    zapsmall(maxCor)
  } else {
    zapsmall(minCor)
  }
}

kappa.mer(model.word3)
vif.mer(model.word3)
max(vif.mer(model.word3))

kappa.mer(model.soc3)
vif.mer(model.soc3)
max(vif.mer(model.soc3))

################# Plots for no. of words ###################
mat <- matrix(c(1,3,5,2,4,6), ncol = 2)
layout(mat, widths=rep(1, ncol(mat)), heights=rep(1, ncol(mat)))

## Homogeneity of variance and normality
plot(predict(model.word3), resid(model.word3), main = "Fitted vs. Residuals",
     xlab = "Predicted", ylab = "Residual")
abline(a=0, b=0)

## Actual vs. fitted
plot(CSRmv$logCOMWRD, predict(model.word3), main = "Observed vs. Fitted",
     xlab="Observed",ylab="Predicted")
abline(a=0,b=1)

qqnorm(resid(model.word3), main="Q-Q plot for conditional residuals") 
qqline(resid(model.word3))
qqnorm(ranef(model.word3)$MS$`(Intercept)`, 
       main="Q-Q plot for the random intercept (Country)")
qqline(ranef(model.word3)$MS$`(Intercept)`)
qqnorm(ranef(model.word3)$YEAR$`(Intercept)`, 
       main="Q-Q plot for the random intercept (Year)")
qqline(ranef(model.word3)$YEAR$`(Intercept)`)

## Hist vs normal
CSRmv$resid <- resid(model.word3)
hist(resid(model.word3), density = 50, breaks = 50, prob = T, freq = F, main = "Histogram vs. Normal", xlab = "Residual")
curve(dnorm(x, mean=mean(CSRmv$resid), sd=sd(CSRmv$resid)), add=TRUE, lwd = 2)

## A3
## Homogeneity of variance and normality

mat <- matrix(c(1,3,5,2,4,6), ncol = 2)
layout(mat, widths=rep(1, ncol(mat)), heights=rep(1, ncol(mat)))

## Homogeneity of variance and normality
plot(predict(model.soc3), resid(model.soc3), main = "Fitted vs. Residuals",
     xlab = "Predicted", ylab = "Residual")
abline(a=0, b=0)

## Actual vs. fitted
plot(CSRmv$socrel, predict(model.soc3), main = "Observed vs. Fitted",
     xlab="Observed",ylab="Predicted")
abline(a=0,b=1)

qqnorm(resid(model.soc3), main="Q-Q plot for conditional residuals") 
qqline(resid(model.soc3))
qqnorm(ranef(model.soc3)$MS$`(Intercept)`, 
       main="Q-Q plot for the random intercept (Country)")
qqline(ranef(model.soc3)$MS$`(Intercept)`)
qqnorm(ranef(model.soc3)$YEAR$`(Intercept)`, 
       main="Q-Q plot for the random intercept (Year)")
qqline(ranef(model.soc3)$YEAR$`(Intercept)`)

## Hist vs normal
CSRmv$resid <- resid(model.soc3)
hist(resid(model.soc3), density = 50, breaks = 50, prob = T, freq = F, main = "Histogram vs. Normal", xlab = "Residual")
curve(dnorm(x, mean=mean(CSRmv$resid), sd=sd(CSRmv$resid)), add=TRUE, lwd = 2)

## Plotting random effects
h_MS <- as.data.frame(coef(model.word3)$MS[1])
h_MS$alpha_se <- se.ranef(model.word3)$MS
h_MS$Country <- rownames(h_MS)
h_MS$Group <- "Country"
names(h_MS)[1:2] <- c("Alpha", "Alpha_SE")

h_YEAR <- as.data.frame(coef(model.word3)$YEAR[1])
h_YEAR$alpha_se <- se.ranef(model.word3)$YEAR
h_YEAR$Year <- rownames(h_YEAR)
h_YEAR$model <- "Year"
names(h_YEAR)[1:2] <- c("Alpha", "Alpha_SE")

ggplot(h_MS, 
       aes(x      = reorder(Country, -Alpha), 
           y      = Alpha,
           ymin   = Alpha - 2 * Alpha_SE, 
           ymax   = Alpha + 2 * Alpha_SE,
           colour = Alpha)) +
  geom_pointrange(position = position_dodge(width=0.5)) +
  xlab("Member state") + ylab(expression(alpha)) +
  theme(axis.text.x = element_text(angle = 90)) +
  theme_minimal() + 
  theme(legend.position = "top")

ggplot(h_YEAR, 
       aes(x      = Year, 
           y      = Alpha,
           ymin   = Alpha - 2 * Alpha_SE, 
           ymax   = Alpha + 2 * Alpha_SE,
           colour = Alpha)) +
  geom_pointrange(position = position_dodge(width=0.5)) +
  xlab("Year") + ylab(expression(alpha)) +
  theme(axis.text.x = element_text(angle = 90)) +
  theme_minimal()

h_MS$exp <- exp(h_MS$Alpha)
h_YEAR$exp <- exp(h_YEAR$Alpha)

## Plotting interpretable random effects by using exp()
h_MSexp <- as.data.frame(exp(coef(model.word3)$MS[1]))
h_MSexp$alpha_se <- exp(se.ranef(model.word3)$MS)
h_MSexp$Country <- rownames(h_MS)
h_MSexp$Group <- "Country"
names(h_MSexp)[1:2] <- c("Alpha", "Alpha_SE")

h_YEARexp <- as.data.frame(exp(coef(model.word3)$YEAR[1]))
h_YEARexp$alpha_se <- exp(se.ranef(model.word3)$YEAR)
h_YEARexp$Year <- rownames(h_YEAR)
h_YEARexp$model <- "Year"
names(h_YEARexp)[1:2] <- c("Alpha", "Alpha_SE")

ggplot(h_MSexp, 
       aes(x      = reorder(Country, -Alpha), 
           y      = Alpha,
           ymin   = Alpha - 2 * Alpha_SE, 
           ymax   = Alpha + 2 * Alpha_SE,
           colour = Alpha)) +
  geom_pointrange(position = position_dodge(width=1), size = 1) +
  xlab("Member state") + ylab(expression(alpha)) +
  theme(axis.text.x = element_text(angle = 90)) +
  theme_minimal() + 
  theme(legend.position = "top") +
  geom_hline(yintercept = 271.0424392, colour="black", size = 1, alpha = 0.6)

ggplot(h_YEARexp, 
       aes(x      = Year, 
           y      = Alpha,
           ymin   = Alpha - 2 * Alpha_SE, 
           ymax   = Alpha + 2 * Alpha_SE,
           colour = Alpha)) +
  geom_pointrange(position = position_dodge(width=0.5), size = 1) +
  xlab("Year") + ylab(expression(alpha)) +
  theme(axis.text.x = element_text(angle = 90)) +
  theme_minimal() +
  geom_hline(yintercept = 271.0424392, colour="black", size = 1, alpha = 0.6)

sjp.lmer(model.word3, 
         type       = "re",
         facet.grid = FALSE,
         #sort.est  = "sort.all",
         y.offset   = .3)
